<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
    <meta name="keywords" content="java,后端,程序员">
    <meta name="description" content="Hexo Theme Keep">
    <meta name="author" content="Axuan">
    
    <title>
        
            反射和模板实际应用:Java爬虫 |
        
        阿轩的代码世界
    </title>
    
<link rel="stylesheet" href="/css/style.css">

    <link rel="shortcut icon" href="/images/logo.svg">
    
<link rel="stylesheet" href="/css/font-awesome.min.css">

    <script id="hexo-configurations">
    let KEEP = window.KEEP || {};
    KEEP.hexo_config = {"hostname":"blog.ctdx.store","root":"/","language":"zh-CN"};
    KEEP.theme_config = {"toc":{"enable":true,"number":false,"expand_all":false,"init_open":false},"style":{"primary_color":"#0066CC","avatar":"/images/avatar.svg","favicon":"/images/logo.svg","article_img_align":"left","left_side_width":"260px","content_max_width":"920px","hover":{"shadow":false,"scale":false},"first_screen":{"enable":true,"background_img":"/images/bg2.jpg","description":null},"scroll":{"progress_bar":{"enable":true},"percent":{"enable":false}}},"local_search":{"enable":true,"preload":true},"code_copy":{"enable":true,"style":"default"},"pjax":{"enable":false},"lazyload":{"enable":true},"version":"3.4.5"};
    KEEP.language_ago = {"second":"%s 秒前","minute":"%s 分钟前","hour":"%s 小时前","day":"%s 天前","week":"%s 周前","month":"%s 个月前","year":"%s 年前"};
  </script>
<meta name="generator" content="Hexo 6.0.0"><link rel="alternate" href="/atom.xml" title="阿轩的代码世界" type="application/atom+xml">
</head>


<body>
<div class="progress-bar-container">
    
        <span class="scroll-progress-bar"></span>
    

    
</div>


<main class="page-container">

    

    <div class="page-main-content">

        <div class="page-main-content-top">
            <header class="header-wrapper">

    <div class="header-content">
        <div class="left">
            
            <a class="logo-title" href="/">
                阿轩的代码世界
            </a>
        </div>

        <div class="right">
            <div class="pc">
                <ul class="menu-list">
                    
                        <li class="menu-item">
                            <a class=""
                               href="/"
                            >
                                首页
                            </a>
                        </li>
                    
                        <li class="menu-item">
                            <a class=""
                               href="/archives"
                            >
                                归档
                            </a>
                        </li>
                    
                        <li class="menu-item">
                            <a class=""
                               href="/tags"
                            >
                                标签
                            </a>
                        </li>
                    
                        <li class="menu-item">
                            <a class=""
                               href="/links"
                            >
                                友链
                            </a>
                        </li>
                    
                        <li class="menu-item">
                            <a class=""
                               href="/about"
                            >
                                关于
                            </a>
                        </li>
                    
                    
                        <li class="menu-item search search-popup-trigger">
                            <i class="fas fa-search"></i>
                        </li>
                    
                </ul>
            </div>
            <div class="mobile">
                
                    <div class="icon-item search search-popup-trigger"><i class="fas fa-search"></i></div>
                
                <div class="icon-item menu-bar">
                    <div class="menu-bar-middle"></div>
                </div>
            </div>
        </div>
    </div>

    <div class="header-drawer">
        <ul class="drawer-menu-list">
            
                <li class="drawer-menu-item flex-center">
                    <a class=""
                       href="/">首页</a>
                </li>
            
                <li class="drawer-menu-item flex-center">
                    <a class=""
                       href="/archives">归档</a>
                </li>
            
                <li class="drawer-menu-item flex-center">
                    <a class=""
                       href="/tags">标签</a>
                </li>
            
                <li class="drawer-menu-item flex-center">
                    <a class=""
                       href="/links">友链</a>
                </li>
            
                <li class="drawer-menu-item flex-center">
                    <a class=""
                       href="/about">关于</a>
                </li>
            
        </ul>
    </div>

    <div class="window-mask"></div>

</header>


        </div>

        <div class="page-main-content-middle">

            <div class="main-content">

                
                    <div class="fade-in-down-animation">
    <div class="article-content-container">

        <div class="article-title">
            <span class="title-hover-animation">反射和模板实际应用:Java爬虫</span>
        </div>

        
            <div class="article-header">
                <div class="avatar">
                    <img src="/images/avatar.svg">
                </div>
                <div class="info">
                    <div class="author">
                        <span class="name">Axuan</span>
                        
                            <span class="author-label">Lv1</span>
                        
                    </div>
                    <div class="meta-info">
                        <div class="article-meta-info">
    <span class="article-date article-meta-item">
        <i class="fas fa-edit"></i>&nbsp;
        <span class="pc">2022-03-12 15:00:58</span>
        <span class="mobile">2022-03-12 15:00</span>
    </span>
    
    
        <span class="article-tags article-meta-item">
            <i class="fas fa-tags"></i>&nbsp;
            <ul>
                
                    <li>
                        <a href="/tags/java/">java</a>&nbsp;
                    </li>
                
            </ul>
        </span>
    

    
    
        <span class="article-wordcount article-meta-item">
            <i class="fas fa-file-word"></i>&nbsp;<span>1.4k 字</span>
        </span>
    
    
        <span class="article-min2read article-meta-item">
            <i class="fas fa-clock"></i>&nbsp;<span>7 分钟</span>
        </span>
    
    
</div>

                    </div>
                </div>
            </div>
        

        <div class="article-content markdown-body">
            <h3 id="反射"><a href="#反射" class="headerlink" title="反射:"></a>反射:</h3><p>在运行状态中，对于任意一个类，都能够知道这个类的所有属性和方法；对于任意一个对象，都能够调用它的任意一个方法和属性；这种动态获取的信息以及动态调用对象的方法的功能称为java语言的反射机制。</p>
<h3 id="模板方法模式"><a href="#模板方法模式" class="headerlink" title="模板方法模式:"></a>模板方法模式:</h3><p>如果编写一个抽象父类，父类提供了多个子类的通用方法，并把一个或多个方法留给其子类去实现，这就是模板模式，是一种十分常见且简单的设计模式。</p>
<hr>
<p>这两天,帮<strong>我的朋友</strong>写了一个简单的爬虫,爬取了一些资源,正好在写代码的时候用到了反射和模板,分享一下知识</p>
<p>爬虫项目基于SpringBoot,先看下Pom文件</p>
<figure class="highlight xml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br></pre></td><td class="code"><pre><span class="line"><span class="tag">&lt;<span class="name">parent</span>&gt;</span></span><br><span class="line">      <span class="tag">&lt;<span class="name">groupId</span>&gt;</span>org.springframework.boot<span class="tag">&lt;/<span class="name">groupId</span>&gt;</span></span><br><span class="line">      <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>spring-boot-starter-parent<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line">      <span class="tag">&lt;<span class="name">version</span>&gt;</span>2.6.4<span class="tag">&lt;/<span class="name">version</span>&gt;</span></span><br><span class="line">      <span class="tag">&lt;<span class="name">relativePath</span>/&gt;</span> <span class="comment">&lt;!-- lookup parent from repository --&gt;</span></span><br><span class="line">  <span class="tag">&lt;/<span class="name">parent</span>&gt;</span></span><br><span class="line">  <span class="tag">&lt;<span class="name">groupId</span>&gt;</span>com.xuan<span class="tag">&lt;/<span class="name">groupId</span>&gt;</span></span><br><span class="line">  <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>html-parser<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line">  <span class="tag">&lt;<span class="name">version</span>&gt;</span>0.0.1-SNAPSHOT<span class="tag">&lt;/<span class="name">version</span>&gt;</span></span><br><span class="line">  <span class="tag">&lt;<span class="name">name</span>&gt;</span>html-parser<span class="tag">&lt;/<span class="name">name</span>&gt;</span></span><br><span class="line">  <span class="tag">&lt;<span class="name">description</span>&gt;</span>html-parser<span class="tag">&lt;/<span class="name">description</span>&gt;</span></span><br><span class="line">  <span class="tag">&lt;<span class="name">properties</span>&gt;</span></span><br><span class="line">      <span class="tag">&lt;<span class="name">java.version</span>&gt;</span>1.8<span class="tag">&lt;/<span class="name">java.version</span>&gt;</span></span><br><span class="line">  <span class="tag">&lt;/<span class="name">properties</span>&gt;</span></span><br><span class="line">  <span class="tag">&lt;<span class="name">dependencies</span>&gt;</span></span><br><span class="line">      <span class="tag">&lt;<span class="name">dependency</span>&gt;</span></span><br><span class="line">          <span class="tag">&lt;<span class="name">groupId</span>&gt;</span>org.jsoup<span class="tag">&lt;/<span class="name">groupId</span>&gt;</span></span><br><span class="line">          <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>jsoup<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line">          <span class="tag">&lt;<span class="name">version</span>&gt;</span>1.14.3<span class="tag">&lt;/<span class="name">version</span>&gt;</span></span><br><span class="line">      <span class="tag">&lt;/<span class="name">dependency</span>&gt;</span></span><br><span class="line"></span><br><span class="line">      <span class="tag">&lt;<span class="name">dependency</span>&gt;</span></span><br><span class="line">          <span class="tag">&lt;<span class="name">groupId</span>&gt;</span>cn.hutool<span class="tag">&lt;/<span class="name">groupId</span>&gt;</span></span><br><span class="line">          <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>hutool-all<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line">          <span class="tag">&lt;<span class="name">version</span>&gt;</span>5.7.21<span class="tag">&lt;/<span class="name">version</span>&gt;</span></span><br><span class="line">      <span class="tag">&lt;/<span class="name">dependency</span>&gt;</span></span><br><span class="line"></span><br><span class="line">      <span class="tag">&lt;<span class="name">dependency</span>&gt;</span></span><br><span class="line">          <span class="tag">&lt;<span class="name">groupId</span>&gt;</span>com.fasterxml.jackson.core<span class="tag">&lt;/<span class="name">groupId</span>&gt;</span></span><br><span class="line">          <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>jackson-databind<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line">      <span class="tag">&lt;/<span class="name">dependency</span>&gt;</span></span><br><span class="line"> <span class="tag">&lt;/<span class="name">dependencies</span>&gt;</span></span><br></pre></td></tr></table></figure>

<p>因为功能简单,只引入了Jsoup(html解析),Hutoll(工具类合集)</p>
<p>我们创建一个注解类,之后用反射获取包含注解的类</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line"><span class="meta">@Documented</span></span><br><span class="line"><span class="meta">@Retention(RetentionPolicy.RUNTIME)</span></span><br><span class="line"><span class="meta">@Target(ElementType.TYPE)</span></span><br><span class="line"><span class="keyword">public</span> <span class="meta">@interface</span> SaveHandler &#123;</span><br><span class="line">    String <span class="title function_">value</span><span class="params">()</span> <span class="keyword">default</span> <span class="string">&quot;&quot;</span>;</span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>

<p>创建一个结果存储的实体类</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">public</span> <span class="keyword">class</span> <span class="title class_">ParseResult</span> &#123;</span><br><span class="line">    <span class="keyword">private</span> String title;</span><br><span class="line">    <span class="keyword">private</span> String url;</span><br><span class="line">    <span class="keyword">private</span> String upTime;</span><br><span class="line">    <span class="keyword">private</span> String type;</span><br><span class="line">    <span class="keyword">private</span> String detail;</span><br><span class="line">    <span class="keyword">private</span> String torrent;</span><br><span class="line">    ...构造函数 Getter/Setter toString</span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>

<p>创建一个通用的保存爬虫文件接口类</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">public</span> <span class="keyword">interface</span> <span class="title class_">BaseHandler</span> &#123;</span><br><span class="line"></span><br><span class="line">    <span class="comment">/**</span></span><br><span class="line"><span class="comment">     * 是否开启该保存处理器</span></span><br><span class="line"><span class="comment">     * <span class="doctag">@return</span> boolean</span></span><br><span class="line"><span class="comment">     */</span></span><br><span class="line">    <span class="type">boolean</span> <span class="title function_">isEnabled</span><span class="params">()</span>;</span><br><span class="line"></span><br><span class="line">    <span class="comment">/**</span></span><br><span class="line"><span class="comment">     * 具体保存逻辑</span></span><br><span class="line"><span class="comment">     * <span class="doctag">@param</span> results 解析结果集</span></span><br><span class="line"><span class="comment">     */</span></span><br><span class="line">    <span class="keyword">void</span> <span class="title function_">save</span><span class="params">(List&lt;ParseResult&gt; results)</span>;</span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>

<p>实现一个保存爬虫文的实现类,isEnabled返回true时,该类才生效,添加保存结果类需要添加@SaveHandler注解并实现BaseHandler接口</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br></pre></td><td class="code"><pre><span class="line"><span class="meta">@SaveHandler</span></span><br><span class="line"><span class="keyword">public</span> <span class="keyword">class</span> <span class="title class_">TextSaveHandler</span> <span class="keyword">implements</span> <span class="title class_">BaseHandler</span> &#123;</span><br><span class="line"></span><br><span class="line">    <span class="keyword">private</span> <span class="keyword">final</span> <span class="type">Logger</span> <span class="variable">logger</span> <span class="operator">=</span> LoggerFactory.getLogger(TextSaveHandler.class);</span><br><span class="line"></span><br><span class="line">    <span class="type">ObjectMapper</span> <span class="variable">mapper</span> <span class="operator">=</span> <span class="keyword">new</span> <span class="title class_">ObjectMapper</span>();</span><br><span class="line"></span><br><span class="line">    <span class="meta">@Override</span></span><br><span class="line">    <span class="keyword">public</span> <span class="type">boolean</span> <span class="title function_">isEnabled</span><span class="params">()</span> &#123;</span><br><span class="line">        <span class="keyword">return</span> <span class="literal">true</span>;</span><br><span class="line">    &#125;</span><br><span class="line"></span><br><span class="line">    <span class="meta">@Override</span></span><br><span class="line">    <span class="keyword">public</span> <span class="keyword">void</span> <span class="title function_">save</span><span class="params">(List&lt;ParseResult&gt; results)</span> &#123;</span><br><span class="line">        <span class="keyword">try</span> (<span class="type">BufferedWriter</span> <span class="variable">writer</span> <span class="operator">=</span> <span class="keyword">new</span> <span class="title class_">BufferedWriter</span>(<span class="keyword">new</span> <span class="title class_">FileWriter</span>(<span class="string">&quot;./result.txt&quot;</span>))) &#123;</span><br><span class="line">            <span class="keyword">for</span> (ParseResult result : results) &#123;</span><br><span class="line">                writer.write(mapper.writeValueAsString(result));</span><br><span class="line">                writer.newLine();</span><br><span class="line">            &#125;</span><br><span class="line">        &#125; <span class="keyword">catch</span> (IOException e) &#123;</span><br><span class="line">            logger.error(<span class="string">&quot;写文件异常:&quot;</span> + e.getMessage(), e);</span><br><span class="line">        &#125;</span><br><span class="line">    &#125;</span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>

<p>接下来就是重要代码:</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">/**</span></span><br><span class="line"><span class="comment"> * 页面解析模板</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@author</span> wuxuan</span></span><br><span class="line"><span class="comment"> */</span></span><br><span class="line"><span class="keyword">public</span> <span class="keyword">abstract</span> <span class="keyword">class</span> <span class="title class_">HtmlContentParserTemplate</span> &#123;</span><br><span class="line"></span><br><span class="line">    <span class="keyword">private</span> <span class="keyword">final</span> <span class="type">Logger</span> <span class="variable">logger</span> <span class="operator">=</span> LoggerFactory.getLogger(HtmlContentParserTemplate.class);</span><br><span class="line"></span><br><span class="line">    <span class="comment">/**</span></span><br><span class="line"><span class="comment">     * 解析的方法 需要实现</span></span><br><span class="line"><span class="comment">     *</span></span><br><span class="line"><span class="comment">     * <span class="doctag">@return</span> 解析结果集</span></span><br><span class="line"><span class="comment">     */</span></span><br><span class="line">    <span class="keyword">protected</span> <span class="keyword">abstract</span> List&lt;ParseResult&gt; <span class="title function_">parseHtml</span><span class="params">()</span>;</span><br><span class="line"></span><br><span class="line">    <span class="comment">/**</span></span><br><span class="line"><span class="comment">     * 保存策略 需要实现BaseHandler接口</span></span><br><span class="line"><span class="comment">     */</span></span><br><span class="line">    <span class="keyword">private</span> <span class="keyword">static</span> <span class="keyword">final</span> List&lt;BaseHandler&gt; HANDLERS = <span class="keyword">new</span> <span class="title class_">LinkedList</span>&lt;&gt;();</span><br><span class="line"></span><br><span class="line">    <span class="comment">/**</span></span><br><span class="line"><span class="comment">     * 页面解析线程池</span></span><br><span class="line"><span class="comment">     */</span></span><br><span class="line">    <span class="keyword">protected</span> <span class="keyword">static</span> <span class="keyword">final</span> <span class="type">ThreadPoolExecutor</span> <span class="variable">THREAD_POOL</span> <span class="operator">=</span> <span class="keyword">new</span> <span class="title class_">ThreadPoolExecutor</span>(<span class="number">10</span>, <span class="number">20</span>, <span class="number">60</span>, TimeUnit.SECONDS, <span class="keyword">new</span> <span class="title class_">LinkedBlockingDeque</span>&lt;&gt;(),</span><br><span class="line">            <span class="keyword">new</span> <span class="title class_">CustomizableThreadFactory</span>(<span class="string">&quot;html-parser-worker-&quot;</span>));</span><br><span class="line"></span><br><span class="line">    <span class="keyword">private</span> <span class="keyword">void</span> <span class="title function_">initializersSaveHandler</span><span class="params">()</span> &#123;</span><br><span class="line">        <span class="keyword">try</span> &#123;</span><br><span class="line">            Set&lt;Class&lt;?&gt;&gt; classes = ClassUtil.scanPackage(<span class="string">&quot;com.xuan.parser.handler&quot;</span>);</span><br><span class="line">            <span class="keyword">for</span> (Class&lt;?&gt; clazz : classes) &#123;</span><br><span class="line">                <span class="keyword">if</span> (clazz.isAnnotationPresent(SaveHandler.class)) &#123;</span><br><span class="line">                    HANDLERS.add((BaseHandler) clazz.newInstance());</span><br><span class="line">                &#125;</span><br><span class="line">            &#125;</span><br><span class="line">        &#125; <span class="keyword">catch</span> (InstantiationException | IllegalAccessException e) &#123;</span><br><span class="line">            logger.error(<span class="string">&quot;加载handler类异常:&quot;</span> + e.getMessage(), e);</span><br><span class="line">        &#125;</span><br><span class="line">    &#125;</span><br><span class="line"></span><br><span class="line"> 		<span class="comment">/**</span></span><br><span class="line"><span class="comment">     * 模板的方法执行入口</span></span><br><span class="line"><span class="comment">     */</span></span><br><span class="line">    <span class="keyword">protected</span> <span class="keyword">final</span> <span class="keyword">void</span> <span class="title function_">run</span><span class="params">()</span> <span class="keyword">throws</span> Exception &#123;</span><br><span class="line">        initializersSaveHandler();</span><br><span class="line">        List&lt;ParseResult&gt; objs = parseHtml();</span><br><span class="line">        THREAD_POOL.shutdown();</span><br><span class="line">        <span class="keyword">while</span> (<span class="literal">true</span>) &#123;</span><br><span class="line">            <span class="keyword">if</span> (THREAD_POOL.isTerminated()) &#123;</span><br><span class="line">                System.out.println(<span class="string">&quot;HtmlParse 线程全部结束&quot;</span>);</span><br><span class="line">                <span class="keyword">break</span>;</span><br><span class="line">            &#125;</span><br><span class="line">            Thread.sleep(<span class="number">1000</span>);</span><br><span class="line">        &#125;</span><br><span class="line">        <span class="keyword">if</span> (objs.size() == <span class="number">0</span>) &#123;</span><br><span class="line">            logger.warn(<span class="string">&quot;解析结果集为空,任务结束&quot;</span>);</span><br><span class="line">            <span class="keyword">return</span>;</span><br><span class="line">        &#125;</span><br><span class="line">        <span class="keyword">for</span> (BaseHandler handler : HANDLERS) &#123;</span><br><span class="line">            <span class="keyword">if</span> (handler.isEnabled()) &#123;</span><br><span class="line">                handler.save(objs);</span><br><span class="line">            &#125;</span><br><span class="line">        &#125;</span><br><span class="line">    &#125;</span><br><span class="line">&#125;</span><br><span class="line"></span><br></pre></td></tr></table></figure>

<p>parseHtml方法留给实现类去实现,毕竟每个解析的页面逻辑不一样,需要定制化处理</p>
<p>initializersSaveHandler方法初始化保存结果类,使用HuTool类的scanPackage方法通过反射拿到包下的所有的类,循环类的合集判断该类是否包含之前写好的注解类</p>
<p>若该类包含注解类,则实例化该类,加入List集合</p>
<p>run方法是模板方法模式的入口方法类,该类规定了模板方法执行顺序,因为run有final修饰,防止重写该类,破坏模板执行顺序</p>
<p>最后我们创建一个实现类</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br></pre></td><td class="code"><pre><span class="line"><span class="meta">@Component(&quot;caoLiu&quot;)</span></span><br><span class="line"><span class="keyword">public</span> <span class="keyword">class</span> <span class="title class_">CaoliuSheQuTask</span> <span class="keyword">extends</span> <span class="title class_">HtmlContentParserTemplate</span> &#123;</span><br><span class="line"></span><br><span class="line">    <span class="keyword">private</span> <span class="keyword">final</span> <span class="type">Logger</span> <span class="variable">logger</span> <span class="operator">=</span> LoggerFactory.getLogger(CaoliuSheQuTask.class);</span><br><span class="line"></span><br><span class="line">    <span class="meta">@Override</span></span><br><span class="line">    <span class="keyword">protected</span> List&lt;ParseResult&gt; <span class="title function_">parseHtml</span><span class="params">()</span> &#123;</span><br><span class="line">        <span class="keyword">final</span> List&lt;ParseResult&gt; result = Collections.synchronizedList(<span class="keyword">new</span> <span class="title class_">LinkedList</span>&lt;&gt;());</span><br><span class="line">        <span class="type">String</span> <span class="variable">baseUrl</span> <span class="operator">=</span> <span class="string">&quot;xxxxxx&quot;</span>;</span><br><span class="line">        <span class="keyword">for</span> (<span class="type">int</span> <span class="variable">i</span> <span class="operator">=</span> <span class="number">1</span>; i &lt;= <span class="number">200</span>; i++) &#123;</span><br><span class="line">            <span class="type">int</span> <span class="variable">page</span> <span class="operator">=</span> i;</span><br><span class="line">            THREAD_POOL.execute(() -&gt; &#123;</span><br><span class="line">                <span class="keyword">try</span> &#123;</span><br><span class="line">                    <span class="type">String</span> <span class="variable">fullUrl</span> <span class="operator">=</span> baseUrl + <span class="string">&quot;thread.php?fid=5&amp;page=&quot;</span> + page;</span><br><span class="line">                    logger.info(<span class="string">&quot;线程:&quot;</span> + Thread.currentThread().getName() + <span class="string">&quot;开始任务:&quot;</span> + fullUrl);</span><br><span class="line">                    <span class="type">String</span> <span class="variable">html</span> <span class="operator">=</span> HttpUtil.get(fullUrl);</span><br><span class="line">                    <span class="type">Document</span> <span class="variable">root</span> <span class="operator">=</span> Jsoup.parse(html);</span><br><span class="line">                    <span class="type">Elements</span> <span class="variable">table</span> <span class="operator">=</span> root.select(<span class="string">&quot;table&quot;</span>).select(<span class="string">&quot;.t_one&quot;</span>);</span><br><span class="line">                    List&lt;Element&gt; elements = table.stream().filter(item -&gt; <span class="string">&quot;center&quot;</span>.equals(item.attr(<span class="string">&quot;align&quot;</span>))).collect(Collectors.toList());</span><br><span class="line">                    elements.forEach(item -&gt; &#123;</span><br><span class="line">                        <span class="type">Elements</span> <span class="variable">a</span> <span class="operator">=</span> item.select(<span class="string">&quot;td&quot;</span>).get(<span class="number">1</span>).select(<span class="string">&quot;h3&quot;</span>).select(<span class="string">&quot;a&quot;</span>);</span><br><span class="line">                        <span class="type">String</span> <span class="variable">title</span> <span class="operator">=</span> a.text();</span><br><span class="line">                        <span class="type">String</span> <span class="variable">url</span> <span class="operator">=</span> a.attr(<span class="string">&quot;href&quot;</span>);</span><br><span class="line">                        <span class="type">String</span> <span class="variable">upTime</span> <span class="operator">=</span> item.select(<span class="string">&quot;td&quot;</span>).last().select(<span class="string">&quot;a&quot;</span>).text();</span><br><span class="line">                        <span class="type">String</span> <span class="variable">secondHtml</span> <span class="operator">=</span> HttpUtil.get(baseUrl + url);</span><br><span class="line">                        <span class="type">Document</span> <span class="variable">parse</span> <span class="operator">=</span> Jsoup.parse(secondHtml);</span><br><span class="line">                        <span class="type">Elements</span> <span class="variable">resourceInfo</span> <span class="operator">=</span> parse.select(<span class="string">&quot;#read_tpc&quot;</span>);</span><br><span class="line">                        <span class="type">String</span> <span class="variable">info</span> <span class="operator">=</span> resourceInfo.text().replace(<span class="string">&quot;【&quot;</span>, <span class="string">&quot;\n【&quot;</span>);</span><br><span class="line">                        <span class="type">String</span> <span class="variable">resourceUrl</span> <span class="operator">=</span> resourceInfo.select(<span class="string">&quot;a&quot;</span>).attr(<span class="string">&quot;href&quot;</span>);</span><br><span class="line">                        result.add(<span class="keyword">new</span> <span class="title class_">ParseResult</span>(title, baseUrl + url, upTime, <span class="string">&quot;分类1&quot;</span>, info, resourceUrl));</span><br><span class="line">                        logger.info(Thread.currentThread().getName() + <span class="string">&quot;任务完成&quot;</span>);</span><br><span class="line">                    &#125;);</span><br><span class="line">                &#125; <span class="keyword">catch</span> (Exception e) &#123;</span><br><span class="line">                    logger.error(<span class="string">&quot;解析异常:&quot;</span> + e.getMessage(), e);</span><br><span class="line">                &#125;</span><br><span class="line">            &#125;);</span><br><span class="line">        &#125;</span><br><span class="line">        <span class="keyword">return</span> result;</span><br><span class="line">    &#125;</span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>

<p>继承模板方法类,重写parseHtml方法,通过Hutool的httpUtil请求拿到html内容,jsoup的parse解析成jsoup document.</p>
<p>jsoup用法简单.选择器类似jq易上手,适合做爬虫</p>

        </div>

        

        
            <ul class="post-tags-box">
                
                    <li class="tag-item">
                        <a href="/tags/java/">#java</a>&nbsp;
                    </li>
                
            </ul>
        

        
            <div class="article-nav">
                
                
                    <div class="article-next">
                        <a class="next"
                           rel="next"
                           href="/2022/03/12/SpringMvc%E6%97%A0web-xml%E9%85%8D%E7%BD%AE/"
                        >
                            <span class="title flex-center">
                                <span class="post-nav-title-item">SpringMvc无web.xml配置</span>
                                <span class="post-nav-item">下一篇</span>
                            </span>
                            <span class="right arrow-icon flex-center">
                              <i class="fas fa-chevron-right"></i>
                            </span>
                        </a>
                    </div>
                
            </div>
        

        
    </div>
</div>


                
            </div>

        </div>

        <div class="page-main-content-bottom">
            <footer class="footer">
    <div class="info-container">
        <div class="copyright-info info-item">
            &copy;
            
              <span>2020</span>
              -
            
            2022&nbsp;<i class="fas fa-heart icon-animate"></i>&nbsp;<a href="/">Axuan</a>
        </div>
        
        <div class="theme-info info-item">
            由 <a target="_blank" href="https://hexo.io">Hexo</a> 驱动&nbsp;|&nbsp;主题&nbsp;<a class="theme-version" target="_blank" href="https://github.com/XPoet/hexo-theme-keep">Keep v3.4.5</a>
        </div>
        
        
    </div>
</footer>

        </div>
    </div>

    
        <div class="post-tools">
            <div class="post-tools-container">
    <ul class="tools-list">
        <!-- TOC aside toggle -->
        
            <li class="tools-item page-aside-toggle">
                <i class="fas fa-outdent"></i>
            </li>
        

        <!-- go comment -->
        
    </ul>
</div>

        </div>
    

    <div class="right-bottom-side-tools">
        <div class="side-tools-container">
    <ul class="side-tools-list">
        <li class="tools-item tool-font-adjust-plus flex-center">
            <i class="fas fa-search-plus"></i>
        </li>

        <li class="tools-item tool-font-adjust-minus flex-center">
            <i class="fas fa-search-minus"></i>
        </li>

        <li class="tools-item tool-expand-width flex-center">
            <i class="fas fa-arrows-alt-h"></i>
        </li>

        <li class="tools-item tool-dark-light-toggle flex-center">
            <i class="fas fa-moon"></i>
        </li>

        <!-- rss -->
        

        
            <li class="tools-item tool-scroll-to-top flex-center">
                <i class="fas fa-arrow-up"></i>
            </li>
        

        <li class="tools-item tool-scroll-to-bottom flex-center">
            <i class="fas fa-arrow-down"></i>
        </li>
    </ul>

    <ul class="exposed-tools-list">
        <li class="tools-item tool-toggle-show flex-center">
            <i class="fas fa-cog fa-spin"></i>
        </li>
        
    </ul>
</div>

    </div>

    
        <aside class="page-aside">
            <div class="post-toc-wrap">
    <div class="post-toc">
        <ol class="nav"><li class="nav-item nav-level-3"><a class="nav-link" href="#%E5%8F%8D%E5%B0%84"><span class="nav-text">反射:</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E6%A8%A1%E6%9D%BF%E6%96%B9%E6%B3%95%E6%A8%A1%E5%BC%8F"><span class="nav-text">模板方法模式:</span></a></li></ol>
    </div>
</div>
        </aside>
    

    <div class="image-viewer-container">
    <img src="">
</div>


    
        <div class="search-pop-overlay">
    <div class="popup search-popup">
        <div class="search-header">
          <span class="search-input-field-pre">
            <i class="fas fa-keyboard"></i>
          </span>
            <div class="search-input-container">
                <input autocomplete="off"
                       autocorrect="off"
                       autocapitalize="off"
                       placeholder="搜索..."
                       spellcheck="false"
                       type="search"
                       class="search-input"
                >
            </div>
            <span class="popup-btn-close">
                <i class="fas fa-times"></i>
            </span>
        </div>
        <div id="search-result">
            <div id="no-result">
                <i class="fas fa-spinner fa-pulse fa-5x fa-fw"></i>
            </div>
        </div>
    </div>
</div>

    

</main>




<script src="/js/utils.js"></script>

<script src="/js/main.js"></script>

<script src="/js/header-shrink.js"></script>

<script src="/js/back2top.js"></script>

<script src="/js/dark-light-toggle.js"></script>



    
<script src="/js/local-search.js"></script>




    
<script src="/js/code-copy.js"></script>




    
<script src="/js/lazyload.js"></script>



<div class="post-scripts">
    
        
<script src="/js/left-side-toggle.js"></script>

<script src="/js/libs/anime.min.js"></script>

<script src="/js/toc.js"></script>

    
</div>



</body>
</html>
